---
title: "summary-statistics"
author: "Ragini Srinivasan"
date: "8/14/2021"
output: pdf_document
---

## Preliminary Work

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(estimatr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(jtools)
huguenots_data <- read_csv("compiled-data.csv")
huguenots_data$ruler_att_original <- as.numeric(as.character(huguenots_data$ruler_att_original))
huguenots_data$city_att_original <- as.numeric(as.character(huguenots_data$city_att_original))
huguenots_data$trade_dist <- as.numeric(as.character(huguenots_data$trade_dist))
```

## summary statistics for trade route distances

```{r}
city_0_data <- huguenots_data %>% filter(city_att_original == 0) %>% filter(!is.na(trade_dist))
city_1_data <- huguenots_data %>% filter(city_att_original == 1) %>% filter(!is.na(trade_dist))
city_2_data <- huguenots_data %>% filter(city_att_original == 2) %>% filter(!is.na(trade_dist))
city_3_data <- huguenots_data %>% filter(city_att_original == 3) %>% filter(!is.na(trade_dist))
```

```{r}
summary_stats <- data.frame(
  
  group_of_cities = c("Cities that Opposed Huguenots", "Cities with a Mixed Reaction", "Cities that Welcomed Huguenots", "Cities that Actively Recruited Huguenots"),
  
  mean = c(mean(city_0_data$trade_dist, na.rm = TRUE),
           mean(city_1_data$trade_dist, na.rm = TRUE),
           mean(city_2_data$trade_dist, na.rm = TRUE),
           mean(city_3_data$trade_dist, na.rm = TRUE)),
  
  median = c(median(city_0_data$trade_dist, na.rm = TRUE),
             median(city_1_data$trade_dist, na.rm = TRUE),
             median(city_2_data$trade_dist, na.rm = TRUE),
             median(city_3_data$trade_dist, na.rm = TRUE)),
  
  s.d. = c(sd(city_0_data$trade_dist, na.rm = TRUE),
           sd(city_1_data$trade_dist, na.rm = TRUE),
           sd(city_2_data$trade_dist, na.rm = TRUE),
           sd(city_3_data$trade_dist, na.rm = TRUE)),
  
  IQR = c(IQR(city_0_data$trade_dist, na.rm = TRUE),
          IQR(city_1_data$trade_dist, na.rm = TRUE),
          IQR(city_2_data$trade_dist, na.rm = TRUE),
          IQR(city_3_data$trade_dist, na.rm = TRUE)),
  
  min = c(min(city_0_data$trade_dist, na.rm = TRUE),
          min(city_1_data$trade_dist, na.rm = TRUE),
          min(city_2_data$trade_dist, na.rm = TRUE),
          min(city_3_data$trade_dist, na.rm = TRUE)),
  
  max = c(max(city_0_data$trade_dist, na.rm = TRUE),
          max(city_1_data$trade_dist, na.rm = TRUE),
          max(city_2_data$trade_dist, na.rm = TRUE),
          max(city_3_data$trade_dist, na.rm = TRUE)),
  
  count = c(length(city_0_data$trade_dist),
            length(city_1_data$trade_dist),
            length(city_2_data$trade_dist),
            length(city_3_data$trade_dist))
)
```

## correlation matrix + summary stats for all variables

```{r}
huguenots_data_simplified <- huguenots_data %>% select(
  "city_att_original", "ruler_att_original", "longitude", "latitude", "trade_dist",
  "election", "guild_part_original", "burgher_rep_original",
  "upper_tail_human_capital_original", "church_ordinance_original", "city_of_residence_original",
  "population_loss_dichotomy", "regional_loss_original", "city_size_original"
)

correlation_matrix <- cor(huguenots_data_simplified, use = "pairwise.complete.obs")
correlation_matrix
```

```{r}
total_summary_stats <- data.frame(
  
  variable = c("city_att", "ruler_att", "longitude", "latitude", "trade_dist",
               "election", "guild_part", "burgher_rep",
               "upper_tail_human_capital", "church_ordinance", "city_of_residence",
               "population_loss", "regional_loss", "city_size"),
  
  mean = c(mean(huguenots_data_simplified$city_att_original, na.rm = TRUE),
           mean(huguenots_data_simplified$ruler_att_original, na.rm = TRUE),
           mean(huguenots_data_simplified$longitude, na.rm = TRUE),
           mean(huguenots_data_simplified$latitude, na.rm = TRUE),
           mean(huguenots_data_simplified$trade_dist, na.rm = TRUE),
           mean(huguenots_data_simplified$election, na.rm = TRUE),
           mean(huguenots_data_simplified$guild_part_original, na.rm = TRUE),
           mean(huguenots_data_simplified$burgher_rep_original, na.rm = TRUE),
           mean(huguenots_data_simplified$upper_tail_human_capital_original, na.rm = TRUE),
           mean(huguenots_data_simplified$church_ordinance_original, na.rm = TRUE),
           mean(huguenots_data_simplified$city_of_residence_original, na.rm = TRUE),
           mean(huguenots_data_simplified$population_loss_dichotomy, na.rm = TRUE),
           mean(huguenots_data_simplified$regional_loss_original, na.rm = TRUE),
           mean(huguenots_data_simplified$city_size_original, na.rm = TRUE)),
  
  median = c(median(huguenots_data_simplified$city_att_original, na.rm = TRUE),
             median(huguenots_data_simplified$ruler_att_original, na.rm = TRUE),
             median(huguenots_data_simplified$longitude, na.rm = TRUE),
             median(huguenots_data_simplified$latitude, na.rm = TRUE),
             median(huguenots_data_simplified$trade_dist, na.rm = TRUE),
             median(huguenots_data_simplified$election, na.rm = TRUE),
             median(huguenots_data_simplified$guild_part_original, na.rm = TRUE),
             median(huguenots_data_simplified$burgher_rep_original, na.rm = TRUE),
             median(huguenots_data_simplified$upper_tail_human_capital_original, na.rm = TRUE),
             median(huguenots_data_simplified$church_ordinance_original, na.rm = TRUE),
             median(huguenots_data_simplified$city_of_residence_original, na.rm = TRUE),
             median(huguenots_data_simplified$population_loss_dichotomy, na.rm = TRUE),
             median(huguenots_data_simplified$regional_loss_original, na.rm = TRUE),
             median(huguenots_data_simplified$city_size_original, na.rm = TRUE)),

  s.d. = c(sd(huguenots_data_simplified$city_att_original, na.rm = TRUE),
           sd(huguenots_data_simplified$ruler_att_original, na.rm = TRUE),
           sd(huguenots_data_simplified$longitude, na.rm = TRUE),
           sd(huguenots_data_simplified$latitude, na.rm = TRUE),
           sd(huguenots_data_simplified$trade_dist, na.rm = TRUE),
           sd(huguenots_data_simplified$election, na.rm = TRUE),
           sd(huguenots_data_simplified$guild_part_original, na.rm = TRUE),
           sd(huguenots_data_simplified$burgher_rep_original, na.rm = TRUE),
           sd(huguenots_data_simplified$upper_tail_human_capital_original, na.rm = TRUE),
           sd(huguenots_data_simplified$church_ordinance_original, na.rm = TRUE),
           sd(huguenots_data_simplified$city_of_residence_original, na.rm = TRUE),
           sd(huguenots_data_simplified$population_loss_dichotomy, na.rm = TRUE),
           sd(huguenots_data_simplified$regional_loss_original, na.rm = TRUE),
           sd(huguenots_data_simplified$city_size_original, na.rm = TRUE)),
  
  min = c(min(huguenots_data_simplified$city_att_original, na.rm = TRUE),
          min(huguenots_data_simplified$ruler_att_original, na.rm = TRUE),
          min(huguenots_data_simplified$longitude, na.rm = TRUE),
          min(huguenots_data_simplified$latitude, na.rm = TRUE),
          min(huguenots_data_simplified$trade_dist, na.rm = TRUE),
          min(huguenots_data_simplified$election, na.rm = TRUE),
          min(huguenots_data_simplified$guild_part_original, na.rm = TRUE),
          min(huguenots_data_simplified$burgher_rep_original, na.rm = TRUE),
          min(huguenots_data_simplified$upper_tail_human_capital_original, na.rm = TRUE),
          min(huguenots_data_simplified$church_ordinance_original, na.rm = TRUE),
          min(huguenots_data_simplified$city_of_residence_original, na.rm = TRUE),
          min(huguenots_data_simplified$population_loss_dichotomy, na.rm = TRUE),
          min(huguenots_data_simplified$regional_loss_original, na.rm = TRUE),
          min(huguenots_data_simplified$city_size_original, na.rm = TRUE)),
  
  max = c(max(huguenots_data_simplified$city_att_original, na.rm = TRUE),
          max(huguenots_data_simplified$ruler_att_original, na.rm = TRUE),
          max(huguenots_data_simplified$longitude, na.rm = TRUE),
          max(huguenots_data_simplified$latitude, na.rm = TRUE),
          max(huguenots_data_simplified$trade_dist, na.rm = TRUE),
          max(huguenots_data_simplified$election, na.rm = TRUE),
          max(huguenots_data_simplified$guild_part_original, na.rm = TRUE),
          max(huguenots_data_simplified$burgher_rep_original, na.rm = TRUE),
          max(huguenots_data_simplified$upper_tail_human_capital_original, na.rm = TRUE),
          max(huguenots_data_simplified$church_ordinance_original, na.rm = TRUE),
          max(huguenots_data_simplified$city_of_residence_original, na.rm = TRUE),
          max(huguenots_data_simplified$population_loss_dichotomy, na.rm = TRUE),
          max(huguenots_data_simplified$regional_loss_original, na.rm = TRUE),
          max(huguenots_data_simplified$city_size_original, na.rm = TRUE)),
  
  count = c(length(huguenots_data$city_att_original),
            length(huguenots_data$ruler_att_original),
            length(huguenots_data$longitude),
            length(huguenots_data$latitude),
            length(huguenots_data$trade_dist),
            length(huguenots_data$election),
            length(huguenots_data$guild_part_original),
            length(huguenots_data$burgher_rep_original),
            length(huguenots_data$upper_tail_human_capital_original),
            length(huguenots_data$church_ordinance_original),
            length(huguenots_data$city_of_residence_original),
            length(huguenots_data$population_loss_dichotomy),
            length(huguenots_data$regional_loss_original),
            length(huguenots_data$city_size_original))
)
```

## cross-tabs + chi-square tests!!!

```{r}
# city attitudes x ruler attitudes
huguenots_filtered_data_2 <-
  huguenots_data %>% filter(!is.na(ruler_att)) %>% filter(!is.na(city_att))
city_att_2 = huguenots_filtered_data_2$city_att
ruler_att = huguenots_filtered_data_2$ruler_att
city_ruler_cross_tab = data.frame(city_att_2, ruler_att)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_ruler_cross_tab, row.vars = "city_att_2", col.vars = "ruler_att")

chisq.test(table(huguenots_filtered_data_2$city_att_original, huguenots_filtered_data_2$ruler_att_original))
fisher.test(table(huguenots_filtered_data_2$city_att_original, huguenots_filtered_data_2$ruler_att_original))
```

```{r}
# city attitudes x guild power
huguenots_filtered_data <-
  huguenots_data %>% filter(!is.na(city_att))
city_att = huguenots_filtered_data$city_att
guild_part = huguenots_filtered_data$guild_part
city_guild_cross_tab = data.frame(city_att, guild_part)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_guild_cross_tab, row.vars = "city_att", col.vars = "guild_part")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$guild_part_original))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$guild_part_original))
```

```{r}
# city attitudes x participation index 1700
part_index_1700 = huguenots_filtered_data$participation_index_1700
city_part_1700_cross_tab = data.frame(city_att, part_index_1700)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_part_1700_cross_tab, row.vars = "city_att", col.vars = "part_index_1700")
```

```{r}
# city attitudes x population loss
population_loss = huguenots_filtered_data$population_loss
city_pop_loss_cross_tab = data.frame(city_att, population_loss)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_pop_loss_cross_tab, row.vars = "city_att", col.vars = "population_loss")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$population_loss_dichotomy))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$population_loss_dichotomy))
```

```{r}
# city attitudes x church ordinance by 1530
church_ordinance = huguenots_filtered_data$church_ordinance
city_church_cross_tab = data.frame(city_att, church_ordinance)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_church_cross_tab, row.vars = "city_att", col.vars = "church_ordinance")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$church_ordinance_original))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$church_ordinance_original))
```

```{r}
# city attitudes x upper-tail human capital
upper_tail_human_capital = huguenots_filtered_data$upper_tail_human_capital
city_upper_tail_cross_tab = data.frame(city_att, upper_tail_human_capital)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_upper_tail_cross_tab, row.vars = "city_att", col.vars = "upper_tail_human_capital")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$upper_tail_human_capital_dichotomy))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$upper_tail_human_capital_dichotomy))
```

```{r}
# city attitudes x election
election = huguenots_filtered_data$election
city_election_cross_tab = data.frame(city_att, election)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_election_cross_tab, row.vars = "city_att", col.vars = "election")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$election))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$election))
```

```{r}
# city attitudes x burgher representation
burgher_rep = huguenots_filtered_data$burgher_rep
city_burgher_cross_tab = data.frame(city_att, burgher_rep)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_burgher_cross_tab, row.vars = "city_att", col.vars = "burgher_rep")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$burgher_rep))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$burgher_rep))
```

```{r}
# city attitudes x city size
city_size = huguenots_filtered_data$city_size
city_burgher_cross_tab = data.frame(city_att, city_size)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_burgher_cross_tab, row.vars = "city_att", col.vars = "city_size")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$city_size_original))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$city_size_original))
```

```{r}
# city attitudes x city of residence
city_of_residence = huguenots_filtered_data$city_of_residence
city_residence_cross_tab = data.frame(city_att, city_of_residence)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_residence_cross_tab, row.vars = "city_att", col.vars = "city_of_residence")

chisq.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$city_of_residence_original))
fisher.test(table(huguenots_filtered_data$city_att_original, huguenots_filtered_data$city_of_residence_original))

# city attitudes (binary/dichotomy) x city of residence
city_att_dichotomy = huguenots_filtered_data$city_att_dichotomy
city_residence_binary_cross_tab = data.frame(city_att_dichotomy, city_of_residence)
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(city_residence_binary_cross_tab, row.vars = "city_att_dichotomy", col.vars = "city_of_residence")

chisq.test(table(huguenots_filtered_data$city_att_dichotomy, huguenots_filtered_data$city_of_residence_original))
fisher.test(table(huguenots_filtered_data$city_att_dichotomy, huguenots_filtered_data$city_of_residence_original))
```















